This R script is used to merge RS data downloaded the Google Drive folder shared by Bea with the ReSurvey database.
library(tidyverse)
library(here)
library(lubridate)
library(dtplyr)
library(sf)
Files downloaded from folder Drive/MOTIVATE-EVEREST/1.VALIDATION/db_Europe/S2/Ene-dic
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/S2"
# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word, "^(ALP|ARC|ATL|BOR|CON|MED|PANONIA)")
unit <- str_remove(first_word, biogeo)
if (unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
Rows: 1978 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 205 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 41 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 163 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 5074 Columns: 90
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (47): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 922 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 95 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 108 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1417 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 107 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 918 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 125 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 1827 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 130 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 205 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSurvey p,...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1188 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_S2 <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_S2)
# Counts per biogeo and unit
print(data_RS_S2 %>% count(biogeo, unit), n = 100)
Keep all indices and metrics in case they are useful.
data_RS_S2 <- data_RS_S2 %>%
# Keep the columns we need
select(PlotObserv, biogeo, unit, year, Lat_update, Lon_update,
starts_with("NDVI"), starts_with("NDMI"), starts_with("NDWI"),
starts_with("EVI"), starts_with("SAVI")) %>%
# Rename Lat and Lon, these are only kept in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year) %>%
# Add column source
mutate(source = "S2")
# Set the folder path
folder_path <- "C:/Data/MOTIVATE/MOTIVATE_RS_data/Landsat"
# List CSV files
csv_files <- list.files(folder_path, full.names = TRUE, recursive = TRUE)
# Function to extract biogeo and unit from the filename
extract_info <- function(filename) {
first_word <- strsplit(filename, "_")[[1]][1]
biogeo <- str_extract(first_word, "^(ALP|ARC|ATL|BOR|CON|MED|PANONIA)")
unit <- str_remove(first_word, biogeo)
if (unit == "") unit <- NA_character_
list(biogeo = biogeo, unit = unit)
}
# Read and process each file
data_list <- lapply(csv_files, function(file) {
info <- extract_info(basename(file)) # Use only the filename
# Read the file
df <- read_csv(file) %>%
# Remove columns that give column type problems when combining data
select(-starts_with("EUNIS"), -starts_with("ReSurvey")) %>%
mutate(biogeo = info$biogeo, unit = info$unit)
return(df)
})
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 5213 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 423 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 138 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 117 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 213 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 64 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 92 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 9447 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 6438 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 96 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (24): system:index, EUNISa, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (19): EUNISa_1, EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc_1, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 99 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 185 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 198 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 20 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (12): system:index, EUNISa, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (31): EUNISa_1, EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2394 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 160 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 2068 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 9807 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (26): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (17): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc_1, EUNISc_4, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 308 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
G2;H2;Avisoh: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)g
Rows: 16171 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2239 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 82 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 10 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 422 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (21): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (22): EUNISa_4, EUNISa_4_d, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EUNISc_1_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 393 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, RS_CODE, ReSurvey p,...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (33): EUNISa_2_d, EUNISa_3, EUNISa_3_d, EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 9 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 5 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 37 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (51): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (29): EUNISa_4, EUNISa_4_d, EUNISb, EUNISb_1, EUNISb_1_d, EUNISb_2, EUNISb_2_d, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2042 Columns: 93
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (20): system:index, EUNISa, EUNISa_1, EUNISa_1_d, EUNISa_2, EUNISa_2_d, EUNISa_3...
dbl (50): EVI_max, EVI_mean, EVI_median, EVI_min, EVI_mode, EVI_p10, EVI_p90, EVI_st...
lgl (23): EUNISa_4, EUNISa_4_d, EUNISb_1, EUNISb_4, EUNISb_4_d, EUNISc, EUNISc_1, EU...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine all data
data_RS_Landsat <- bind_rows(data_list)
# View the resulting tibble
print(data_RS_Landsat)
# Counts per biogeo and unit
print(data_RS_Landsat %>% count(biogeo, unit), n = 100)
Keep all indices and metrics in case they are useful.
data_RS_Landsat <- data_RS_Landsat %>%
# Keep the columns we need
select(PlotObserv, biogeo, unit, year, Lat_update, Lon_update,
starts_with("NDVI"), starts_with("NDMI"), starts_with("NDWI"),
starts_with("EVI"), starts_with("SAVI")) %>%
# Rename Lat and Lon, these are only kept in case there is difference with
# those in the ReSurvey database due to updates based on Ilona's info
rename(Lat_RS = Lat_update, Lon_RS = Lon_update) %>%
# Same for year
rename(year_RS = year) %>%
# Add column source
mutate(source = "Landsat")
data_RS_CH <- read_csv(
"C:/Data/MOTIVATE/MOTIVATE_RS_data/Canopy_Height_1m/Europe_points_CanopyHeight_1m.csv")
Rows: 425310 Columns: 8
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): system:index, .geo
dbl (6): Lat_update, Lon_update, canopy_height, obs_unique, plot_uniqu, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_RS_CH
data_RS_CH <- data_RS_CH %>%
# Keep the columns we need
select(obs_unique, canopy_height)
In this file, there is the correspondence obs_unique - PlotObservationID.
db_Europa <- read_csv(
here("..", "DB_first_check", "data", "clean","db_Europa_20250107.csv")
)
Rows: 425310 Columns: 12
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Country, RS_CODE, ReSurvey site, ReSurvey plot, Expert System, Location method
dbl (6): PlotObservationID, Lon_updated, Lat_updated, plot_unique_id, year, obs_uniq...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
This is the ReSurvey database after updates (to be continued).
db_resurv <- read_tsv(
here("..", "DB_first_check","data", "clean","db_resurv_updated_clean.csv"),
col_types = cols(
# Dynamically specify EUNIS columns as character
.default = col_guess(), # Default guessing for other columns
EUNISa = col_character(),
EUNISb = col_character(),
EUNISc = col_character(),
EUNISd = col_character(),
EUNISa_1 = col_character(),
EUNISa_2 = col_character(),
EUNISa_3 = col_character(),
EUNISa_4 = col_character(),
EUNISb_1 = col_character(),
EUNISb_2 = col_character(),
EUNISb_3 = col_character(),
EUNISb_4 = col_character(),
EUNISc_1 = col_character(),
EUNISc_2 = col_character(),
EUNISc_3 = col_character(),
EUNISc_4 = col_character(),
EUNISd_1 = col_character(),
EUNISd_2 = col_character(),
EUNISd_3 = col_character(),
EUNISd_4 = col_character(),
EUNISa_1_descr = col_character(),
EUNISb_1_descr = col_character(),
EUNISc_1_descr = col_character(),
EUNISd_1_descr = col_character(),
EUNIS_assignation = col_character(),
EUNISa_2_descr = col_character(),
EUNISa_3_descr = col_character(),
EUNISa_4_descr = col_character(),
EUNISb_2_descr = col_character(),
EUNISb_3_descr = col_character(),
EUNISb_4_descr = col_character(),
EUNISc_2_descr = col_character(),
EUNISc_3_descr = col_character(),
EUNISc_4_descr = col_character(),
EUNISd_2_descr = col_character(),
EUNISd_3_descr = col_character(),
EUNISd_4_descr = col_character()
)
)
No parsing issues!
Get sample of ReSurvey database:
db_Europa_sample <- left_join(
db_resurv %>%
select(PlotObservationID, RS_CODE, `ReSurvey site`, `ReSurvey plot`,
Lon_updated,Lat_updated, year,date,
starts_with("EUNIS"), `Location method`) %>%
select(-EUNIS_assignation),
db_Europa %>%
select(PlotObservationID, Lon_updated, Lat_updated, year, obs_unique_id)
)
Joining with `by = join_by(PlotObservationID, Lon_updated, Lat_updated, year)`
print(db_Europa_sample, width = Inf)
Add column PLOT to data to identify unique plots:
db_Europa_sample <- db_Europa_sample %>%
# Original names give problems, create new vars
mutate(RS_site = `ReSurvey site`, RS_plot = `ReSurvey plot`) %>%
# Convert to data.table for faster processing
lazy_dt() %>%
# Group by the 3 vars that uniquely identify each plot
group_by(RS_CODE, RS_site, RS_plot) %>%
# Create a new variable PLOT for each group
mutate(PLOT = .GRP) %>%
# Convert back to tibble
as_tibble() %>%
# Remove unneeded vars
select(-RS_site, -RS_plot)
Keep only habitats F, R, S and Q, and, for each PLOT, keep only the last resurvey:
db_Europa_sample_latest <- db_Europa_sample %>%
filter(EUNISa_1 %in% c("T", "R", "S", "Q")) %>%
group_by(PLOT) %>%
filter(date == max(date)) %>%
ungroup()
Save as csv for Bea:
write_csv(db_Europa_sample_latest,
file = "data/clean/db_Europa_sample_latest.csv")
Save as shp to merge with bioregions:
# # Convert to sf object
# db_Europa_sample_latest_sf <- st_as_sf(db_Europa_sample_latest,
# coords = c("Lon_updated", "Lat_updated"),
# crs = 4326) # WGS84
# st_write(db_Europa_sample_latest_sf,
# "C:/GIS/MOTIVATE/shapefiles/db_Europa_sample_latest_sf.shp")
Get only the columns PlotObservationID (original unique identifier) and obs_unique_id (unique identified created by me).
db_Europa_sample_latest <- db_Europa_sample_latest %>%
select(PlotObservationID, obs_unique_id)
data_RS_S2_ID <- db_Europa_sample_latest %>%
right_join(data_RS_S2 %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_S2_ID.
# data_RS_S2_phen_ID <- db_Europa_sample_latest %>%
# right_join(data_RS_S2_phen %>%
# # Rename to be able to join on this column
# rename(PlotObservationID = PlotObserv))
Now we have PlotObservationID in data_RS_S2_phen_ID
data_RS_Landsat_ID <- db_Europa_sample_latest %>%
right_join(data_RS_Landsat %>%
# Rename to be able to join on this column
rename(PlotObservationID = PlotObserv))
Joining with `by = join_by(PlotObservationID)`
Now we have PlotObservationID in data_RS_Landsat_ID.
data_RS_CH_ID <- db_Europa %>%
select(PlotObservationID, obs_unique_id) %>%
right_join(data_RS_CH %>%
# Rename to be able to join on this column
rename(obs_unique_id = obs_unique))
Joining with `by = join_by(obs_unique_id)`
Now we have PlotObservationID in data_RS_CH_ID.
For some points, there is data both from S2 and Landsat. In those cases, use the S2 data because it is more precise (10 m vs 30 m).
data_RS_S2_ID <- data_RS_S2_ID %>%
rename_with(~ paste0(., "_S2"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_S2"), starts_with("SAVI")) %>%
select(-source)
data_RS_Landsat_ID <- data_RS_Landsat_ID %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDMI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("NDWI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("EVI")) %>%
rename_with(~ paste0(., "_Landsat"), starts_with("SAVI")) %>%
select(-source)
Join S2, S2_phen and Landsat data:
data_RS <- data_RS_S2_ID %>%
# full_join(data_RS_S2_phen_ID) %>%
full_join(data_RS_Landsat_ID)
Joining with `by = join_by(PlotObservationID, obs_unique_id, biogeo, unit, year_RS,
Lat_RS, Lon_RS)`
Number of observations with NDVI_max data from both S2 and Landsat:
nrow(data_RS %>% filter(!is.na(NDVI_max_S2) & !is.na(NDVI_max_Landsat)))
[1] 14518
Difference between NDVI_max values from S2 and Landsat:
data_RS %>% filter(!is.na(NDVI_max_S2) & !is.na(NDVI_max_Landsat)) %>%
mutate(diff_NDVI_max = abs(NDVI_max_S2 - NDVI_max_Landsat)) %>%
ggplot(aes(x = diff_NDVI_max, fill = paste(biogeo, unit, sep = "-"))) +
geom_histogram(color = "black") +
facet_wrap(~ paste(biogeo, unit, sep = "-")) + theme(legend.position = "none")
data_RS %>% filter(!is.na(NDMI_max_S2) & !is.na(NDMI_max_Landsat)) %>%
mutate(diff_NDMI_max = abs(NDMI_max_S2 - NDMI_max_Landsat)) %>%
ggplot(aes(x = diff_NDMI_max, fill = paste(biogeo, unit, sep = "-"))) +
geom_histogram(color = "black") +
facet_wrap(~ paste(biogeo, unit, sep = "-")) + theme(legend.position = "none")
There is a large difference between NDVI values from S2 and Landsat. So far, use the S2 data, but checking with Bea / Jose.
When values are available from both satellites, use S2:
data_RS <- data_RS %>%
mutate(across(
matches("^(NDVI|NDMI|NDWI|EVI|SAVI)_(max|mean|median|min|mode|p10|p90|stdDev)_S2$"),
~ case_when(
# If both the current column and the corresponding Landsat column are NA,
# set to NA_real_
is.na(.x) & is.na(get(sub("_S2$", "_Landsat", cur_column()))) ~ NA_real_,
# If the corresponding Landsat column is NA, use the current column's value
is.na(get(sub("_S2$", "_Landsat", cur_column()))) ~ .x,
# If the current column is NA, use the corresponding Landsat column's value
is.na(.x) ~ get(sub("_S2$", "_Landsat", cur_column())),
# Otherwise, use the current column's value
TRUE ~ .x
), .names = "{col}_combined")) %>%
rename_with(~ sub("_S2_combined$", "", .), matches("_S2_combined$"))
Get number of points per biogeo and unit:
npoints_bioregion_R <- data_RS %>% count(biogeo, unit) %>%
mutate(npoints_R = n) %>%
select(-n)
Read number of points per biogeo and unit from GIS:
npoints_bioregion_GIS <- read_delim(
"data/clean/Npoints_bioregion.csv", delim = ";"
) %>%
select (BIOGEO, UNIT, Join_Count) %>%
mutate(biogeo = BIOGEO, unit = str_remove(UNIT, BIOGEO),
npoints_GIS = Join_Count) %>%
mutate(biogeo = ifelse(biogeo == "PAN", "PANONIA", biogeo)) %>%
mutate(unit = ifelse(biogeo == "PANONIA", NA, unit)) %>%
select(- BIOGEO, - UNIT, -Join_Count)
Rows: 42 Columns: 54
── Column specification ────────────────────────────────────────────────────────────────
Delimiter: ";"
chr (44): name, BIOGEO, UNIT, RS_CODE, RSrvyst, RSrvypl, date, EUNISa, EUNISb, EUNIS...
dbl (5): OID_, Join_Count, TARGET_FID, code, PLOT
num (5): AREA, PltObID, year, Shape_Length, Shape_Area
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Merge both and see differences in n points:
npoints_bioregion_merged <- full_join(npoints_bioregion_R,
npoints_bioregion_GIS)
Joining with `by = join_by(biogeo, unit)`
Nice table to email to Bea:
kable(print(npoints_bioregion_merged %>% arrange(biogeo, unit), n = 100))
| biogeo | unit | npoints_R | npoints_GIS |
|---|---|---|---|
| ALP | ALPS | 5214 | 5360 |
| ALP | ATRA | 423 | 423 |
| ALP | BALKAN | 138 | 138 |
| ALP | DINARIC | NA | 0 |
| ALP | ENNINE | 51 | 54 |
| ALP | NORDIC | 117 | 117 |
| ALP | PYR | 213 | 213 |
| ALP | ROMANIAN | 64 | 64 |
| ANA | TURKEY | NA | 0 |
| ARC | ICELAND | NA | 0 |
| ARC | NORWAY | 92 | 106 |
| ATL | BENELUX | 9447 | 9530 |
| ATL | BRITAIN | 6438 | 6761 |
| ATL | FRANCE | 96 | 97 |
| ATL | IBERIA | 99 | 99 |
| ATL | IRELAND | NA | 0 |
| ATL | NORDIC | NA | 0 |
| BLS | BLACKSEA | NA | 0 |
| BOR | BALTIC | 185 | 185 |
| BOR | FINLAND | 198 | 240 |
| BOR | NORDIC | 20 | 20 |
| CON | AUSTRIA | 2394 | 2439 |
| CON | BALKAN | 160 | 160 |
| CON | FRANCE | 2068 | 2091 |
| CON | GERMANY | 9807 | 9924 |
| CON | ITALICA | 308 | 312 |
| CON | NORDIC | 16171 | 16172 |
| CON | POLAND | 2239 | 2241 |
| CON | ROMANIA | 82 | 82 |
| MAC | ARONESIA | NA | 0 |
| MED | BALEAR | NA | 0 |
| MED | BALKAN | NA | 0 |
| MED | FRANCE | NA | 0 |
| MED | GREECE | 10 | 10 |
| MED | IBERIA | 422 | 422 |
| MED | ITALICA | 393 | 395 |
| MED | PORT | 12 | 12 |
| MED | SICILIA | 9 | 9 |
| MED | TIRRENO | 5 | 5 |
| MED | TURKEY | 37 | 37 |
| PANONIA | NA | 2042 | 2046 |
| STE | PPIC | NA | 0 |
db_resurv_RS <- db_resurv %>%
left_join(data_RS %>% select(-obs_unique_id)) %>%
left_join(data_RS_CH_ID %>% select(-obs_unique_id)) %>%
mutate(S2_data = !is.na(NDVI_max_S2) & !is.na(NDMI_max_S2),
Landsat_data = !is.na(NDVI_max_Landsat) & !is.na(NDMI_max_Landsat),
S2_or_Landsat_data = !is.na(NDVI_max) & !is.na(NDMI_max),
# S2_phen_data = !is.na(SOS_DOY),
CH_data = !is.na(canopy_height)) %>%
# So far, remove cols for _S2 and _Landsat
select(-matches("_(S2|Landsat)$"))
Joining with `by = join_by(PlotObservationID)`
Joining with `by = join_by(PlotObservationID)`
db_resurv_RS %>% count(S2_data)
db_resurv_RS %>% count(Landsat_data)
db_resurv_RS %>% count(S2_or_Landsat_data)
db_resurv_RS %>% count(CH_data)
# db_resurv_RS %>% count(S2_phen_data)
Save clean file for analyses (to be updated continuously due to updates in ReSurvey database and updates on RS data).
write_tsv(db_resurv_RS,here("data", "clean","db_resurv_RS_20250610.csv"))
sessionInfo()
R version 4.5.0 (2025-04-11 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26100)
Matrix products: default
LAPACK version 3.12.1
locale:
[1] LC_COLLATE=Spanish_Spain.utf8 LC_CTYPE=Spanish_Spain.utf8
[3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C
[5] LC_TIME=Spanish_Spain.utf8
time zone: Europe/Madrid
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] knitr_1.50 sf_1.0-20 dtplyr_1.3.1 here_1.0.1 lubridate_1.9.4
[6] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4 readr_2.1.5
[11] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] sass_0.4.10 utf8_1.2.5 generics_0.1.4 class_7.3-23
[5] KernSmooth_2.23-26 stringi_1.8.7 hms_1.1.3 digest_0.6.37
[9] magrittr_2.0.3 evaluate_1.0.3 grid_4.5.0 timechange_0.3.0
[13] RColorBrewer_1.1-3 fastmap_1.2.0 rprojroot_2.0.4 jsonlite_2.0.0
[17] e1071_1.7-16 DBI_1.2.3 scales_1.4.0 jquerylib_0.1.4
[21] cli_3.6.5 rlang_1.1.6 crayon_1.5.3 units_0.8-7
[25] bit64_4.6.0-1 withr_3.0.2 cachem_1.1.0 yaml_2.3.10
[29] tools_4.5.0 parallel_4.5.0 tzdb_0.5.0 vctrs_0.6.5
[33] R6_2.6.1 proxy_0.4-27 classInt_0.4-11 lifecycle_1.0.4
[37] bit_4.6.0 vroom_1.6.5 pkgconfig_2.0.3 pillar_1.10.2
[41] bslib_0.9.0 gtable_0.3.6 Rcpp_1.0.14 data.table_1.17.2
[45] glue_1.8.0 xfun_0.52 tidyselect_1.2.1 rstudioapi_0.17.1
[49] farver_2.1.2 htmltools_0.5.8.1 labeling_0.4.3 rmarkdown_2.29
[53] compiler_4.5.0